{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 百度AI开放平台—OCR（光学字符识别）\n",
    "# 学生实践周\n",
    "## 通用文字识别\n",
    "* 接口描述\n",
    "> 在通用文字识别的基础上，提供更高精度的识别服务，支持更多语种识别（丹麦语、荷兰语、马来语、瑞典语、印尼语、波兰语、罗马尼亚语、土耳其语、希腊语、匈牙利语、泰语、越语、阿拉伯语、印地语及部分中国少数民族语言），并将字库从1w+扩展到2w+，能识别所有常用字和大部分生僻字。\n",
    "\n",
    "* 请求示例\n",
    "> 1.HTTP方法：POST\n",
    "> 2.请求URL：https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic\n",
    "\n",
    "* 请求参数\n",
    "> 1.url参数-token\n",
    "> 2.图片参数-图片文件\n",
    "\n",
    "* 示例代码1\n",
    "\n",
    "```\n",
    "# encoding:utf-8\n",
    "\n",
    "import requests\n",
    "import base64\n",
    "\n",
    "'''\n",
    "通用文字识别（高精度版）\n",
    "'''\n",
    "\n",
    "request_url = \"https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic\"\n",
    "# 二进制方式打开图片文件\n",
    "f = open('[本地文件]', 'rb')\n",
    "img = base64.b64encode(f.read())\n",
    "\n",
    "params = {\"image\":img}\n",
    "access_token = '[调用鉴权接口获取的token]'\n",
    "request_url = request_url + \"?access_token=\" + access_token\n",
    "headers = {'content-type': 'application/x-www-form-urlencoded'}\n",
    "response = requests.post(request_url, data=params, headers=headers)\n",
    "if response:\n",
    "    print (response.json())\n",
    "```\n",
    "\n",
    "* 示例代码2\n",
    "\n",
    "```\n",
    "# encoding:utf-8\n",
    "\n",
    "import requests\n",
    "import base64\n",
    "\n",
    "'''\n",
    "通用物体和场景识别\n",
    "'''\n",
    "\n",
    "request_url = \"https://aip.baidubce.com/rest/2.0/image-classify/v2/advanced_general\"\n",
    "# 二进制方式打开图片文件\n",
    "# 1.图片文件准备\n",
    "f = open('xihu.jpg', 'rb')\n",
    "img = base64.b64encode(f.read())\n",
    "\n",
    "# 2. 酬载准备\n",
    "payload={\n",
    "    'access_token':zhichao_AT,\n",
    "    'image':img,\n",
    "    'baike_num':5\n",
    "}\n",
    "\n",
    "headers = {'content-type': 'application/x-www-form-urlencoded'}\n",
    "response = requests.post(request_url, data=payload, headers=headers)\n",
    "if response:\n",
    "    print (response.json())\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'refresh_token': '25.944bb5a928061d1ee6bd56aa5380dd90.315360000.1965128134.282335-25954485', 'expires_in': 2592000, 'session_key': '9mzdCyzjv7wbOAvkVifJeFB5/5+Z200m5n5/WUk+As8sfR46azqs5xXuqszMwVmxM1Z+yKlE4PPty5bU32w9e2+Ie0GbhQ==', 'access_token': '24.04ab5778628a0bc76cf961076dd3b55d.2592000.1652360134.282335-25954485', 'scope': 'public vis-ocr_ocr brain_ocr_scope brain_ocr_general brain_ocr_general_basic vis-ocr_business_license brain_ocr_webimage brain_all_scope brain_ocr_idcard brain_ocr_driving_license brain_ocr_vehicle_license vis-ocr_plate_number brain_solution brain_ocr_plate_number brain_ocr_accurate brain_ocr_accurate_basic brain_ocr_receipt brain_ocr_business_license brain_solution_iocr brain_qrcode brain_ocr_handwriting brain_form brain_ocr_passport brain_ocr_vat_invoice brain_numbers brain_ocr_business_card brain_ocr_train_ticket brain_ocr_taxi_receipt vis-ocr_household_register vis-ocr_vis-classify_birth_certificate vis-ocr_台湾通行证 vis-ocr_港澳通行证 vis-ocr_机动车购车发票识别 vis-ocr_机动车检验合格证识别 vis-ocr_车辆vin码识别 vis-ocr_定额发票识别 vis-ocr_保单识别 vis-ocr_机打发票识别 vis-ocr_行程单识别 brain_ocr_vin brain_ocr_quota_invoice brain_ocr_birth_certificate brain_ocr_household_register brain_ocr_HK_Macau_pass brain_ocr_taiwan_pass brain_ocr_vehicle_invoice brain_ocr_vehicle_certificate brain_ocr_air_ticket brain_ocr_invoice brain_ocr_insurance_doc brain_formula brain_seal brain_ocr_facade brain_ocr_meter brain_doc_analysis brain_ocr_webimage_loc brain_ocr_medical_paper brain_ocr_doc_analysis_office brain_vat_invoice_verification brain_ocr_medical_detail brain_vehicle_registration_certificate brain_ocr_online_taxi_itinerary brain_ocr_multi_idcard brain_ocr_mixed_multi_vehicle brain_ocr_weigth_note brain_ocr_ multiple_invoice wise_adapt lebo_resource_base lightservice_public hetu_basic lightcms_map_poi kaidian_kaidian ApsMisTest_Test权限 vis-classify_flower lpq_开放 cop_helloScope ApsMis_fangdi_permission smartapp_snsapi_base smartapp_mapp_dev_manage iop_autocar oauth_tp_app smartapp_smart_game_openapi oauth_sessionkey smartapp_swanid_verify smartapp_opensource_openapi smartapp_opensource_recapi fake_face_detect_开放Scope vis-ocr_虚拟人物助理 idl-video_虚拟人物助理 smartapp_component smartapp_search_plugin avatar_video_test b2b_tp_openapi b2b_tp_openapi_online', 'session_secret': 'ec53e0e0c111fbeb74364c6ae42adb27'}\n"
     ]
    }
   ],
   "source": [
    "# encoding:utf-8\n",
    "import requests \n",
    "\n",
    "# client_id 为官网获取的AK， client_secret 为官网获取的SK\n",
    "host = 'https://aip.baidubce.com/oauth/2.0/token?'\n",
    "payload = {\n",
    "    'grant_type':'client_credentials',\n",
    "    'client_id':'6IPzBqnakoLe5Fmy7S4qEwVq',\n",
    "    'client_secret':'gao6sL8ciVQDlWdK2RRGwTfOprI3IVEr'\n",
    "}\n",
    "response = requests.get(host,params=payload)\n",
    "if response:\n",
    "    print(response.json())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'24.04ab5778628a0bc76cf961076dd3b55d.2592000.1652360134.282335-25954485'"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "access_token = response.json()['access_token']\n",
    "access_token"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 通用文字"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'words_result': [{'words': '第田次'},\n",
       "  {'words': '定犯3'},\n",
       "  {'words': '却借由别人也会犯销'},\n",
       "  {'words': '来宽慰自己'},\n",
       "  {'words': '@3T'}],\n",
       " 'words_result_num': 5,\n",
       " 'log_id': 1513863404096148429}"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "request_url = \"https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic\"\n",
    "# 二进制方式打开图片文件\n",
    "f = open('F:/桌面/API/week06/ty.jpg', 'rb')\n",
    "img = base64.b64encode(f.read())\n",
    "\n",
    "params = {\"image\":img}\n",
    "access_token = '24.f9e7cb2a156a0ce947f807ca7a9b471b.2592000.1652357836.282335-25954485'\n",
    "request_url = request_url + \"?access_token=\" + access_token\n",
    "headers = {'content-type': 'application/x-www-form-urlencoded'}\n",
    "response = requests.post(request_url, data=params, headers=headers)\n",
    "if response:\n",
    "    display (response.json())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 网络图片文字识别"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'words_result': [{'words': '生命短暂'},\n",
       "  {'words': '我们连爱好它也来不及'},\n",
       "  {'words': '便可能已过完一生'},\n",
       "  {'words': '应好好珍惜'},\n",
       "  {'words': '别糖尼'}],\n",
       " 'words_result_num': 5,\n",
       " 'log_id': 1513863413531122200}"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import requests\n",
    "import base64\n",
    "request_url = \"https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic\"\n",
    "f = open('F:/桌面/API/week06/wztp.jpg', 'rb')\n",
    "img = base64.b64encode(f.read())\n",
    "params = {\"image\":img}\n",
    "access_token = '24.f9e7cb2a156a0ce947f807ca7a9b471b.2592000.1652357836.282335-25954485'\n",
    "request_url = request_url + \"?access_token=\" + access_token\n",
    "headers = {'content-type': 'application/x-www-form-urlencoded'}\n",
    "response = requests.post(request_url, data=params, headers=headers)\n",
    "if response:\n",
    "    display (response.json())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 手写文字识别"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'words_result': [{'location': {'top': 174,\n",
       "    'left': 62,\n",
       "    'width': 304,\n",
       "    'height': 61},\n",
       "   'words': '经济独立是真丽'},\n",
       "  {'location': {'top': 251, 'left': 59, 'width': 342, 'height': 59},\n",
       "   'words': '很有安全感的一件事.'}],\n",
       " 'words_result_num': 2,\n",
       " 'log_id': 1513863416304507677}"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "request_url = \"https://aip.baidubce.com/rest/2.0/ocr/v1/handwriting\"\n",
    "# 二进制方式打开图片文件\n",
    "f = open('F:/桌面/API/week06/sx.jpg', 'rb')\n",
    "img = base64.b64encode(f.read())\n",
    "\n",
    "params = {\"image\":img}\n",
    "access_token = '24.f9e7cb2a156a0ce947f807ca7a9b471b.2592000.1652357836.282335-25954485'\n",
    "request_url = request_url + \"?access_token=\" + access_token\n",
    "headers = {'content-type': 'application/x-www-form-urlencoded'}\n",
    "response = requests.post(request_url, data=params, headers=headers)\n",
    "if response:\n",
    "    display (response.json())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 二维码识别\n",
    "[草料二维码生成器](https://cli.im/)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'codes_result': [{'text': ['我真的会谢呀'], 'type': 'QR_CODE'}],\n",
       " 'codes_result_num': 1,\n",
       " 'log_id': 1513863417789894459}"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "request_url = \"https://aip.baidubce.com/rest/2.0/ocr/v1/qrcode\"\n",
    "# 二进制方式打开图片文件\n",
    "f = open('F:/桌面/API/week06/ewm.png', 'rb')\n",
    "img = base64.b64encode(f.read())\n",
    "\n",
    "params = {\"image\":img}\n",
    "access_token = '24.f9e7cb2a156a0ce947f807ca7a9b471b.2592000.1652357836.282335-25954485'\n",
    "request_url = request_url + \"?access_token=\" + access_token\n",
    "headers = {'content-type': 'application/x-www-form-urlencoded'}\n",
    "response = requests.post(request_url, data=params, headers=headers)\n",
    "if response:\n",
    "    display (response.json())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 卡片类文字识别\n",
    "* 身份证&银行卡"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'words_result': {'姓名': {'location': {'top': 455,\n",
       "    'left': 701,\n",
       "    'width': 229,\n",
       "    'height': 130},\n",
       "   'words': '常涛'},\n",
       "  '民族': {'location': {'top': 683, 'left': 1210, 'width': 87, 'height': 104},\n",
       "   'words': '汉'},\n",
       "  '住址': {'location': {'top': 1088, 'left': 686, 'width': 1070, 'height': 249},\n",
       "   'words': '河北省邯郸市肥乡县肥乡镇'},\n",
       "  '公民身份号码': {'location': {'top': 1598,\n",
       "    'left': 1103,\n",
       "    'width': 1395,\n",
       "    'height': 134},\n",
       "   'words': '411526199706013217'},\n",
       "  '出生': {'location': {'top': 875, 'left': 689, 'width': 850, 'height': 106},\n",
       "   'words': '19970601'},\n",
       "  '性别': {'location': {'top': 683, 'left': 714, 'width': 93, 'height': 106},\n",
       "   'words': '男'}},\n",
       " 'words_result_num': 6,\n",
       " 'idcard_number_type': 1,\n",
       " 'image_status': 'normal',\n",
       " 'log_id': 1513863428393319557}"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "request_url = \"https://aip.baidubce.com/rest/2.0/ocr/v1/idcard\"\n",
    "# 二进制方式打开图片文件\n",
    "f = open('F:/桌面/API/week06/sfz.jpg', 'rb')\n",
    "img = base64.b64encode(f.read())\n",
    "\n",
    "params = {\"id_card_side\":\"front\",\"image\":img}\n",
    "access_token = '24.f9e7cb2a156a0ce947f807ca7a9b471b.2592000.1652357836.282335-25954485'\n",
    "request_url = request_url + \"?access_token=\" + access_token\n",
    "headers = {'content-type': 'application/x-www-form-urlencoded'}\n",
    "response = requests.post(request_url, data=params, headers=headers)\n",
    "if response:\n",
    "    display (response.json())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 财务类文字识别-增值税发票识别"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "request_url = \"https://aip.baidubce.com/rest/2.0/ocr/v1/vat_invoice\"\n",
    "# 二进制方式打开图片文件\n",
    "f = open('F:/桌面/API/week06/fp.jpg', 'rb')\n",
    "img = base64.b64encode(f.read())\n",
    "\n",
    "params = {\"image\":img}\n",
    "access_token = '24.f9e7cb2a156a0ce947f807ca7a9b471b.2592000.1652357836.282335-25954485'\n",
    "request_url = request_url + \"?access_token=\" + access_token\n",
    "headers = {'content-type': 'application/x-www-form-urlencoded'}\n",
    "response = requests.post(request_url, data=params, headers=headers)\n",
    "if response:\n",
    "    display (response.json())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
